
%!TEX program = xelatex
%!TEX TS-program = xelatex
%!TEX encoding = UTF-8 Unicode

\documentclass[10pt]{article} 

\input{wang_preamble.tex}

\begin{document}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{center}
{\Large\bf \H 上海立信会计金融学院期终考试卷 } \hspace{0.3cm} {\Large \underline{ A }卷 解答}

\vspace{0.3cm}

{\large \bf \H 2020 $\sim$ 2021 学年 第 二 学期 }

\vspace{0.3cm}

{\large \bf \H \underline{ \emph{2019级数学与应用数学专业} } 《\underline{ \emph{统计软件} }》 课程代码：\underline{ 160960214}  }

\end{center}

\vspace{0.3cm}

每题10分，共100分，看答题情况给分。

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\begin{enumerate}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目1：
在一个图书室的数据集里，保存有书籍的作者、书名、类别和页数等数据。数据框就是用来完成这个任务的。数据框也是一种特殊的列表，其中作者这个分量保存了所有书籍的作者，书名保存了所有书籍的名称，等等。测试下述程序创建了一个数据框。
{\color{blue}
\begin{verbatim}
> author<-c('dalgaard','karlin','higham','xia','jiang','li')
> title<-c('statistical software','stochastic processes','mathematical writing',
   'real analysis','mathematical modellings','numerical analysis')
> language<-c('English','English','English','Chinese','Chinese','Chinese')
> pages<-c(299,562,244,311,401,326)
> books<-data.frame(author=author,title=title,language=language,pages=pages)
> summary(books)
\end{verbatim}
}

判断下述说法的正误。在右边的括号里打 $\surd$ 或 $\times$.
\begin{enumerate}
\item  命令 {\color{blue}\verb+books[2]+} 将返回一个只保留所有书籍名称的数据框。\dotfill (\hspace{1cm})
\item  命令 {\color{blue}\verb+books[2,]+} 将返回 higham 的图书数据记录。\dotfill (\hspace{1cm})
\item  命令 {\color{blue}\verb+books[,4]+} 将返回一个只保留所有图书的页数的向量。\dotfill (\hspace{1cm})
\item  命令 {\color{blue}\verb+books$pages>300+} 将返回页数超过300的那些书籍的信息。\dotfill (\hspace{1cm})
\item  命令 {\color{blue}\verb+books$language+} 将返回一个有两个水平的因子型数据的向量。\dotfill (\hspace{1cm})
\end{enumerate}

{\color{red}解答：ace正确，bd错误。}

(b)命令 {\color{blue}\verb+books[2,]+} 将返回 karlin 的图书数据记录。

(d)命令 {\color{blue}\verb+books$pages>300+} 将返回布尔型数据向量 {\color{blue}\verb+(F,T,F,T,T,T)+}. 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目2：
设某班的测试成绩如下。设显著性水平为 0.05, 使用 t 检验，推断平均成绩是否等于70分。
\begin{table}[ht!]
\centering
\begin{tabular}{|c|c|c|c|c|c|c|c|c|c|c|}\hline
学号 &1&2&3&4&5&6&7&8&9&10\\ \hline
成绩 & 70 &72 & 57 & 59 & 85 & 63 & 56 & 62 & 80 & 89 \\ \hline
\end{tabular}
\end{table}

%\begin{myenumerate}
%\item  是的，无法拒绝平均成绩等于70分的零假设。%{\color{blue}\verb+ >  +}
%\item  不是，拒绝平均成绩等于70分的零假设。%{\color{blue}\verb+ >  +}
%%\item  {\color{blue}\verb+ >  +}
%%\item  {\color{blue}\verb+ >  +}
%\end{myenumerate}

{\color{red}解答：：运行下述程序，得到 $p$ 值0.8571, 大于显著性水平，因此无法拒绝零假设，认为测试的平均成绩等于70分。
%函数 {\color{blue}\verb+ sd( ) +} 
{\color{blue}
\begin{verbatim}
> x<-c(70,72,57,59,85,63,56,62,80,89)
> t.test(x,mu=70)
	One Sample t-test
data:  x
t = -0.18532, df = 9, p-value = 0.8571
alternative hypothesis: true mean is not equal to 70
95 percent confidence interval:
 60.75522 77.84478
sample estimates:
mean of x 
     69.3 
\end{verbatim}
}       

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目3：
计算下述事件的概率。把代码和结果写在题目下面。
\begin{enumerate}
\item  一个标准正态分布的随机变量取值大于0.5的概率。
\item  一个均值为2的指数分布的随机变量取值大于3的概率。
\item  一个自由度为5的卡方分布的随机变量取值大于5的概率。
\item  一个均匀的骰子扔10次，数字5或6出现至少4次的概率。
\item  一个均值为10的泊松分布的随机变量取值大于12的概率。
\end{enumerate}

{\color{red}解答：第4小题是二项分布 $b(n,p)$, 其中 $n=10,p=1/3$.
{\color{blue}
\begin{verbatim}
> 1-pnorm(0.5)
[1] 0.3085375
> 1-pexp(3,rate=1/2)
[1] 0.2231302
> 1-pchisq(5,df=5)
[1] 0.4158802
> 1-pbinom(3,10,1/3)
[1] 0.4407357
> 1-ppois(12,10)
[1] 0.2084435
\end{verbatim}
}

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目4：
设 $X_1,X_2,\cdots, X_n$ 是独立同分布的一列随机变量，其分布函数是 $F(x)$. 定义经验分布函数为 
\begin{eqnarray*}
\hat{F}_n(x)=\frac{1}{n} \sum\limits_{i=1}^{n} I\{X_i\le x\}.
\end{eqnarray*}
其中当 $X_i\le x$ 时 $I\{X_i\le x\}=1$, 否则等于0. Glivenko-Cantelli 定理是说经验分布函数与总体分布函数的最大差异
依概率收敛于0, 即
\begin{eqnarray*}
\sup\limits_{x\in\mathbb{R}} | \hat{F}_n(x)-F(x)| \to 0, \,\,\, a.s. 
\end{eqnarray*}
写一段程序来验证这个定理。

{\color{red}解答：我们分别产生样本容量为50, 100, 300 和 500 的标准正态分布的4个样本，然后分别画出经验分布函数，
并与标准正态分布的分布函数进行比较。 
{\color{blue}
\begin{verbatim}
> opar<-par(mfrow=c(2,2),mex=0.6,mar=c(5,5,3,4)+0.8)
> x<-rnorm(50); n=length(x)
> plot(sort(x),(1:n)/n,type='s',ylim=c(-0.1,1.1))
> curve(pnorm(x),add=T)
> x<-rnorm(100); n=length(x)
> plot(sort(x),(1:n)/n,type='s',ylim=c(-0.1,1.1))
> curve(pnorm(x),add=T)
> x<-rnorm(300); n=length(x)
> plot(sort(x),(1:n)/n,type='s',ylim=c(-0.1,1.1))
> curve(pnorm(x),add=T)
> x<-rnorm(500); n=length(x)
> plot(sort(x),(1:n)/n,type='s',ylim=c(-0.1,1.1))
> curve(pnorm(x),add=T)
\end{verbatim}
}

\begin{figure}[H]
\centering
\includegraphics[height=8cm, width=12cm]{glivenko.png}
\caption{Glivenko-Cantelli定理的一个例子}
\end{figure}

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目5：
设有简单随机样本 $X_1,\cdots,X_n$ 来自正态分布 $N(\mu,\sigma^2)$.\\  
判断下述说法的正误。在右边的括号里打 $\surd$ 或 $\times$.
\begin{enumerate}
\item 统计量 $U=\frac{\bar{X}-\mu}{\sigma/\sqrt{n}}$ 服从标准正态分布 $N(0,1)$. \dotfill (\hspace{1cm})
\item 统计量 $t=\frac{\bar{X}-\mu}{S/\sqrt{n}}$ 服从分布 $t(n-1)$. \dotfill (\hspace{1cm})
\item 当总体方差已知的时候，使用 $t$ 统计量。 \dotfill (\hspace{1cm})
\item 当总体方差未知的时候，使用 $U$ 统计量。 \dotfill (\hspace{1cm})
\item 当 $n$ 很大时，自由度为 $n-1$ 的 $t$ 分布越来越接近标准正态分布。 \dotfill (\hspace{1cm})
\end{enumerate}

{\color{red}解答：(abe)正确，(cd)错误。\\ 
当总体方差未知的时候，使用 $t$ 统计量，当总体方差已知的时候，使用 $U$ 统计量。
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目6：
%设数据 $X_1,\cdots,X_m$ 来自正态总体 $X\sim N(\mu_1,\sigma_1^2)$, 数据 $Y_1,\cdots,Y_n$ 来自另一个与总体 $X$ 独立的正态总体 $Y\sim N(\mu_2,\sigma_2^2)$. 考虑假设检验 $H_0: \mu_1=\mu_2, \,\text{ vs. }\, H_1: \mu_1\neq \mu_2$.
对冷却到 $-0.72$\textcelsius 的样品用 A,B两种测量方法测量其融化到 $0$\textcelsius 时的潜热，数据如下。

方法A：79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, 80.05, 80.03, 80.02, 80.00, 80.02.

方法B：80.02, 79.94, 79.98, 79.97, 80.03, 79.95, 79.97, 79.97. 

假设它们来自正态总体。判断它们的方差是否相等，并检验两种测量方法的平均性能是否相等。（$\alpha=0.05$）

{\color{red}解答：结果显示，方差检验的 $p$ 值为 $0.3938$, 所以接受方差相等的零假设。两样本 $t$ 检验的 $p$ 值为 $0.006939$, 所以拒绝均值相等的零假设。

{\color{blue}
\begin{verbatim}
> x<-c(79.98, 80.04, 80.02, 80.04, 80.03, 80.03, 80.04, 79.97, 80.05, 
       80.03, 80.02, 80.00, 80.02)
> y<-c(80.02, 79.94, 79.98, 79.97, 80.03, 79.95, 79.97, 79.97)

> var.test(x,y)

	F test to compare two variances

data:  x and y
F = 0.58374, num df = 12, denom df = 7, p-value = 0.3938
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.1251097 2.1052687
sample estimates:
ratio of variances 
         0.5837405 
> t.test(x,y)

	Welch Two Sample t-test

data:  x and y
t = 3.2499, df = 12.027, p-value = 0.006939
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.01385526 0.07018320
sample estimates:
mean of x mean of y 
 80.02077  79.97875 
 
\end{verbatim}
}

}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目7：
载入程序包 ISwR 中的数据框 \,{\color{blue}\verb+rmr+}. 
\begin{enumerate}
\item  画出代谢率关于体重的散点图。
\item  对这个关系拟合一条直线。
\item  这个模型预测60公斤的体重对应的代谢率是多少？
\item  对这条直线的斜率给出一个置信度为90\%的置信区间。
\end{enumerate}

{\color{red}解答：使用lm函数建立回归模型，使用abline函数画出回归直线，使用predict函数进行预测，使用confint函数求得参数的置信区间。

{\color{blue}
\begin{verbatim}
> attach(rmr)
> plot(rmr)
> lm01<-lm(metabolic.rate~body.weight)
> abline(lm01)
> predict(lm01,newdata=data.frame(body.weight=60))
       1 
1234.798 
> confint(lm01,level=0.90)
                   5 %       95 %
(Intercept) 681.757545 940.695804
body.weight   5.415255   8.703801
\end{verbatim}
}

\begin{figure}[H]
\centering
\includegraphics[height=5cm, width=8cm]{rmr.png}
\caption{代谢率对体重的回归分析}
\end{figure}

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目8：
考虑两种治疗胃溃疡的药物，其中药物 Pirenzepine 治愈23人，未治愈7人。 药物 Trithiozine 治愈18人，未治愈13人。
使用 \,{\color{blue}\verb+prop.test+}, 卡方检验和 Fisher 精确检验，计算治愈率之差的95\%置信区间。%（$\alpha=0.05$）

{\color{red}解答：治愈率之差的95\%置信区间是 $[-0.077, 0.449]$.

{\color{blue}
\begin{verbatim}
> prop.test(c(23,18),c(30,31))

	2-sample test for equality of proportions with continuity correction

data:  c(23, 18) out of c(30, 31)
X-squared = 1.6243, df = 1, p-value = 0.2025
alternative hypothesis: two.sided
95 percent confidence interval:
 -0.07716531  0.44920832
sample estimates:
   prop 1    prop 2 
0.7666667 0.5806452 

> M<-matrix(c(23,18,7,13),2,2)
> M
     [,1] [,2]
[1,]   23    7
[2,]   18   13

> fisher.test(M)

	Fisher's Exact Test for Count Data

data:  M
p-value = 0.1737
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.6936416 8.4948588
sample estimates:
odds ratio 
  2.339104 

> chisq.test(M)

	Pearson's Chi-squared test with Yates' continuity correction

data:  M
X-squared = 1.6243, df = 1, p-value = 0.2025
\end{verbatim}
}

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目9：
在红细胞叶酸盐浓度的数据框 \,{\color{blue}\verb+red.cell.folate+} 中，\,{\color{blue}\verb+ventilation+} 是一个分类变量，区分三种不同的手术麻醉情况。使用单因素方差分析，判断不同麻醉情况下， 红细胞叶酸盐的浓度是否有显著差异？（$\alpha=0.05$）

{\color{red}解答：方差分析结果显示 $p$ 值为0.04359, 略小于显著性水平。因此认为在不同手术麻醉情况下，病人的红细胞叶酸盐的浓度有显著差异。

{\color{blue}
\begin{verbatim}
> ?red.cell.folate
> attach(red.cell.folate)
> summary(red.cell.folate)
     folate          ventilation
 Min.   :206.0   N2O+O2,24h:8   
 1st Qu.:249.5   N2O+O2,op :9   
 Median :274.0   O2,24h    :5   
 Mean   :283.2                  
 3rd Qu.:305.5                  
 Max.   :392.0                  
> anova(lm(folate~ventilation))
Analysis of Variance Table

Response: folate
            Df Sum Sq Mean Sq F value  Pr(>F)  
ventilation  2  15516  7757.9  3.7113 0.04359 *
Residuals   19  39716  2090.3                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\end{verbatim}
}

}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\item %题目10：
数据集 \,{\color{blue}\verb+coking+} 描述了不同炉宽和不同炉温的条件下，从煤炭炼制焦炭所需要的时间。
使用双因素方差分析，研究炉温和炉宽者两个因素的交互作用是否显著。

{\color{red}解答：考虑带这两个因子的交互效应的线性模型，方差分析结果表明交互项的系数是显著的。

{\color{blue}
\begin{verbatim}
> attach(coking)
> anova(lm(time~width*temp))
Analysis of Variance Table

Response: time
           Df  Sum Sq Mean Sq F value    Pr(>F)    
width       2 123.143  61.572 222.102 3.312e-10 ***
temp        1  17.209  17.209  62.076 4.394e-06 ***
width:temp  2   5.701   2.851  10.283  0.002504 ** 
Residuals  12   3.327   0.277                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
\end{verbatim}
}

}






%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\end{enumerate}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



\end{document}


